Change the positions of the galaxies in a SHAM to be shuffled then NFW-distributed, instead of on the subhalos.

Shuffling procedure is as followed, from Jeremey

procedure:

take a bin in halo mass (small bins, like 0.1dex wide). (this is all halos, regardless of whether they have a galaxy in them or not). take all the centrals and put them in a list. take all the satellites and put them in a separate list.

randomly assign the centrals to all the halos in the bin.

randomly assign each satellite to a halo in the bin (repeat until all satellites are gone. this should preserve poisson distribution of satellite occupation). when assigning a satellite to a halo, preserve the position of the satellite and velocity of the satellite relative to the original host halo. ie, your list of satllites has dx, dy, dz, and dvx, dvy, dvz, then you add x, y, z, and vx, vy, vz of the new halo to those quantities.


In [15]:
import numpy as np
import astropy
from itertools import izip
from halotools.utils.table_utils import compute_conditional_percentiles
from halotools.utils import *

In [16]:
from halotools.mock_observables import tpcf

In [17]:
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
Lbox = 1000.0 #catalog = np.loadtxt('ab_sham_hod_data_cut.npy') ab_property = 'halo_vmax@mpeak' catalog = astropy.table.Table.read('../catalog_ab_%s.hdf5'%ab_property, format = 'hdf5')

In [18]:
Lbox = 1000.0
#catalog = np.loadtxt('ab_sham_hod_data_cut.npy')
ab_property = 'halo_vmax@mpeak'
sh_catalog = astropy.table.Table.read('../catalog_ab_%s_shuffled.hdf5'%ab_property, format = 'hdf5')
catalog = astropy.table.Table.read('../catalog_ab_%s.hdf5'%ab_property, format = 'hdf5')

In [34]:
catalog.colnames


Out[34]:
['halo_upid',
 'halo_y',
 'halo_x',
 'halo_z',
 'halo_vmax@mpeak',
 'halo_rvir',
 'halo_mpeak',
 'halo_id',
 'halo_vx',
 'halo_vy',
 'halo_vz',
 'halo_rs',
 'halo_mvir',
 'halo_nfw_conc',
 'halo_hostid',
 'halo_mvir_host_halo',
 'gal_smass']

In [20]:
from halotools.mock_observables import tpcf

In [42]:
PMASS = 7.62293e+07

In [21]:
nd = 4.2e-4 #nd of final cat 
n_obj_needed = int(nd*(Lbox**3))
sort_idxs = np.argsort(catalog['gal_smass'])
cut_catalog = catalog[sort_idxs[:n_obj_needed]]

print len(catalog)


95969198

In [35]:
print np.min(np.log10(catalog['halo_mvir_host_halo']))


9.6885

In [37]:
print np.min(np.log10(cut_catalog['halo_mvir_host_halo']))


10.4666

In [59]:
print np.sum(np.isnan(catalog['gal_smass']))*1.0/len(catalog)
print np.sum(np.isnan(cut_catalog['gal_smass']))*1.0/len(cut_catalog)


0.988008058586
0.0

In [57]:
N = 1e3
print np.sum(np.log10(catalog['halo_mvir'])> np.log10(N*PMASS))*1.0/len(catalog)
print np.sum(np.log10(cut_catalog['halo_mvir'])> np.log10(N*PMASS))*1.0/len(cut_catalog)


0.500190696602
0.998457142857

In [55]:
N = 1e4 #host halo
print np.log10(N*PMASS)


11.8821219317

In [27]:
np.sum(np.isnan(catalog['gal_smass']))/(1.0*len(catalog))


Out[27]:
0.98800805858563079

In [32]:
1-n_obj_needed*1.0/len(catalog)


Out[32]:
0.9956235958124814

In [29]:
plt.hist(cut_catalog['gal_smass'])


Out[29]:
(array([  9.18000000e+02,   9.91000000e+02,   5.90000000e+01,
          1.13500000e+03,   2.91800000e+03,   5.94800000e+03,
          1.80710000e+04,   5.95980000e+04,   1.14033000e+05,
          2.16329000e+05]),
 array([ 10.00875948,  10.11532879,  10.22189809,  10.3284674 ,
         10.43503671,  10.54160601,  10.64817532,  10.75474463,
         10.86131394,  10.96788324,  11.07445255]),
 <a list of 10 Patch objects>)

In [7]:
rbins = np.logspace(-1, 1.6, 18)
xi = tpcf(np.c_[catalog['halo_x'], catalog['halo_y'], catalog['halo_z']],rbins, period=Lbox )
sh_xi = tpcf(np.c_[sh_catalog['halo_x'], sh_catalog['halo_y'], sh_catalog['halo_z']],rbins, period=Lbox )

In [8]:
print xi
print sh_xi


[  4.80874943e+03   2.42259614e+03   1.20103766e+03   5.98568275e+02
   2.60745618e+02   1.01677987e+02   4.19736285e+01   1.95523466e+01
   1.02258308e+01   5.87522450e+00   3.40091228e+00   1.91927758e+00
   1.07193372e+00   5.85641026e-01   3.00545765e-01   1.46694083e-01
   6.29470435e-02]
[  4.75969979e+03   2.42961815e+03   1.18935385e+03   5.91050177e+02
   2.60028911e+02   1.01831894e+02   4.17086266e+01   1.93981973e+01
   1.02877413e+01   5.83752915e+00   3.37164758e+00   1.91109642e+00
   1.06754559e+00   5.81531936e-01   2.99723727e-01   1.46308219e-01
   6.27898046e-02]

In [9]:
rbc = (rbins[1:]+rbins[:-1])/2
plt.plot(rbc, xi/sh_xi)
#plt.plot(rbc,sh_xi)
plt.xscale('log')


catalog = catalog[catalog['halo_mvir'] > 200*PMASS]
plt.hist(catalog['halo_x'])

In [5]:
catalog.colnames


Out[5]:
['halo_upid',
 'halo_y',
 'halo_x',
 'halo_z',
 'halo_vmax@mpeak',
 'halo_rvir',
 'halo_mpeak',
 'halo_id',
 'halo_vx',
 'halo_vy',
 'halo_vz',
 'halo_rs',
 'halo_mvir',
 'halo_nfw_conc',
 'halo_hostid',
 'halo_mvir_host_halo',
 'gal_smass']

In [6]:
print len(catalog)


95969198
print np.sum(catalog['halo_mvir_host_halo'])

In [7]:
add_halo_hostid(catalog, delete_possibly_existing_column=True)

In [8]:
for prop in ['halo_x', 'halo_y', 'halo_z','halo_vx', 'halo_vy', 'halo_vz', 'halo_nfw_conc', 'halo_rvir']:
    broadcast_host_halo_property(catalog, prop, delete_possibly_existing_column=True)

In [9]:
len(catalog)


Out[9]:
95969198

In [10]:
catalog.colnames


Out[10]:
['halo_upid',
 'halo_y',
 'halo_x',
 'halo_z',
 'halo_vmax@mpeak',
 'halo_rvir',
 'halo_mpeak',
 'halo_id',
 'halo_vx',
 'halo_vy',
 'halo_vz',
 'halo_rs',
 'halo_mvir',
 'halo_nfw_conc',
 'halo_mvir_host_halo',
 'gal_smass',
 'halo_hostid',
 'halo_x_host_halo',
 'halo_y_host_halo',
 'halo_z_host_halo',
 'halo_vx_host_halo',
 'halo_vy_host_halo',
 'halo_vz_host_halo',
 'halo_nfw_conc_host_halo',
 'halo_rvir_host_halo']

In [11]:
from halotools.utils.table_utils import compute_prim_haloprop_bins
from math import ceil
min_log_mass = np.log10(np.min(catalog['halo_mvir']))-0.001
max_log_mass = np.log10(np.max(catalog['halo_mvir']))+0.001
print min_log_mass, max_log_mass
dlog10_prim_haloprop = 0.1
num_prim_haloprop_bins = (max_log_mass - min_log_mass) / dlog10_prim_haloprop
prim_haloprop_bin_boundaries = np.logspace(min_log_mass, max_log_mass,
    num=int(ceil(num_prim_haloprop_bins)))

prim_haloprop_bins = compute_prim_haloprop_bins(prim_haloprop = catalog['halo_mvir_host_halo'],\
                                                dlog10_prim_haloprop=dlog10_prim_haloprop,
                                                prim_haloprop_bin_boundaries = prim_haloprop_bin_boundaries)


9.68749945068 15.2108903656

In [12]:
shuffled_pos = np.zeros((len(catalog), 3))
shuffled_vel = np.zeros((len(catalog), 3))
shuffled_upids = np.zeros((len(catalog)))
shuffled_host_mvir = np.zeros((len(catalog)))

In [13]:
shuffled_mags = np.zeros((len(catalog), 1))
#shuffled_mags[:, 0] = catalog['halo_vpeak_mag']
#shuffled_mags[:, 1] = catalog['halo_vvir_mag']
#shuffled_mags[:, 2] = catalog['halo_alpha_05_mag']

In [15]:
bins_in_halocat = set(prim_haloprop_bins)

for ibin in bins_in_halocat:
    print ibin
    #if ibin > 25:
    #    continue
    indices_of_prim_haloprop_bin = np.where(prim_haloprop_bins == ibin)[0]
    
    centrals_idx = np.where(catalog[indices_of_prim_haloprop_bin]['halo_upid'] == -1)[0]
    n_centrals = len(centrals_idx)
    satellites_idx = np.where(catalog[indices_of_prim_haloprop_bin]['halo_upid']!=-1)[0]
    n_satellites = len(satellites_idx)
    
    if centrals_idx.shape[0]!=0:
        rand_central_idxs = np.random.choice(indices_of_prim_haloprop_bin[centrals_idx], size = n_centrals, replace = False)
    else:
        rand_central_idxs = np.array([])
        
    shuffled_mags[indices_of_prim_haloprop_bin[centrals_idx],0]= \
            catalog[rand_central_idxs]['gal_smass']

    shuffled_mags[indices_of_prim_haloprop_bin[satellites_idx],0] = \
            catalog[indices_of_prim_haloprop_bin[satellites_idx]]['gal_smass']
    #Create second rand_central_idxs, Iterate through satellite hosts and assign them when they match. 
                
    for idx, coord in enumerate(['x','y','z']):
        # don't need to shuffle positions cu we've shuffled mags for centrals
        shuffled_pos[indices_of_prim_haloprop_bin[centrals_idx], idx] = \
                catalog[indices_of_prim_haloprop_bin[centrals_idx]]['halo_'+coord]
            
        shuffled_vel[indices_of_prim_haloprop_bin[centrals_idx], idx] = \
                catalog[indices_of_prim_haloprop_bin[centrals_idx]]['halo_v'+coord]
            
    shuffled_upids[indices_of_prim_haloprop_bin[centrals_idx]] = -1
    
    shuffled_host_mvir[indices_of_prim_haloprop_bin[centrals_idx]] = \
            catalog[indices_of_prim_haloprop_bin[centrals_idx]]['halo_mvir']
        
    hosts_id, first_sat_idxs = np.unique(catalog[indices_of_prim_haloprop_bin[satellites_idx]]['halo_upid'], return_index=True)
    shuffled_idxs = np.random.permutation(hosts_id.shape[0])
    shuffled_hosts_id = hosts_id[shuffled_idxs]
    shuffled_sat_idxs = first_sat_idxs[shuffled_idxs]
    shuffled_arrays_idx = 0
    host_map = dict() #maps the current host id to the index of a new host id. 
    #the host_id -> idx map is easier than the host_id -> host_id map
    #print 'lsi', len(satellites_idx)
    for sat_idx in satellites_idx:
        host_id = catalog[indices_of_prim_haloprop_bin][sat_idx]['halo_upid']
        
        if host_id in host_map:
            new_host_id, hosts_old_satellite_idx = host_map[host_id]
        else:
            new_host_id = shuffled_hosts_id[shuffled_arrays_idx]
            hosts_old_satellite_idx = shuffled_sat_idxs[shuffled_arrays_idx]
            host_map[host_id] = (new_host_id, hosts_old_satellite_idx)
            shuffled_arrays_idx+=1
            
        shuffled_upids[indices_of_prim_haloprop_bin[sat_idx]] = new_host_id
        
        shuffled_host_mvir[indices_of_prim_haloprop_bin[sat_idx]] = \
                catalog[indices_of_prim_haloprop_bin[satellites_idx]][hosts_old_satellite_idx]['halo_mvir_host_halo']
        
        for idx, coord in enumerate(['x','y','z']):
            shuffled_pos[indices_of_prim_haloprop_bin[sat_idx], idx] = \
                    (catalog[indices_of_prim_haloprop_bin[sat_idx]]['halo_'+coord] -\
                    catalog[indices_of_prim_haloprop_bin[sat_idx]]['halo_'+coord+'_host_halo']+\
                    catalog[indices_of_prim_haloprop_bin[satellites_idx]][hosts_old_satellite_idx]['halo_'+coord+'_host_halo'])%Lbox
            #print catalog[indices_of_prim_haloprop_bin[sat_idx]]['halo_'+coord]
            #print catalog[indices_of_prim_haloprop_bin[sat_idx]]['halo_'+coord+'_host_halo']
            #print catalog[indices_of_prim_haloprop_bin[satellites_idx]][hosts_old_satellite_idx]['halo_'+coord+'_host_halo']
            #print '*'*50       
            shuffled_vel[indices_of_prim_haloprop_bin[sat_idx], idx] = \
                    (catalog[indices_of_prim_haloprop_bin[sat_idx]]['halo_v'+coord] -\
                    catalog[indices_of_prim_haloprop_bin[sat_idx]]['halo_v'+coord+'_host_halo']+\
                    catalog[indices_of_prim_haloprop_bin[satellites_idx][hosts_old_satellite_idx]]['halo_v'+coord+'_host_halo'])


1
2
3
4
5
6
7
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-15-5e89e9d9b85e> in <module>()
     41     #print 'lsi', len(satellites_idx)
     42     for sat_idx in satellites_idx:
---> 43         host_id = catalog[indices_of_prim_haloprop_bin][sat_idx]['halo_upid']
     44 
     45         if host_id in host_map:

/u/ki/swmclau2/.conda/envs/hodemulator/lib/python2.7/site-packages/astropy/table/table.pyc in __getitem__(self, item)
   1229             # is produced by np.where, as in t[np.where(t['a'] > 2)]
   1230             # For all, a new table is constructed with slice of all columns
-> 1231             return self._new_from_slice(item)
   1232         else:
   1233             raise ValueError('Illegal type {0} for table item access'

/u/ki/swmclau2/.conda/envs/hodemulator/lib/python2.7/site-packages/astropy/table/table.pyc in _new_from_slice(self, slice_)
    762         for col in cols:
    763             col.info._copy_indices = self._copy_indices
--> 764             newcol = col[slice_]
    765             if col.info.indices:
    766                 newcol = col.info.slice_indices(newcol, slice_, len(col))

astropy/table/_column_mixins.pyx in astropy.table._column_mixins._ColumnGetitemShim.__getitem__ (astropy/table/_column_mixins.c:1100)()

astropy/table/_column_mixins.pyx in astropy.table._column_mixins.base_getitem (astropy/table/_column_mixins.c:992)()

astropy/table/_column_mixins.pyx in astropy.table._column_mixins.column_getitem (astropy/table/_column_mixins.c:1040)()

/u/ki/swmclau2/.conda/envs/hodemulator/lib/python2.7/site-packages/astropy/table/column.pyc in __array_finalize__(self, obj)
    267         return self.data.__ne__(other)
    268 
--> 269     def __array_finalize__(self, obj):
    270         # Obj will be none for direct call to Column() creator
    271         if obj is None:

KeyboardInterrupt: 

In [ ]:
print len(visited_idxs)
print len(catalog)
print len(catalog) - len(visited_idxs)

In [ ]:
print len(visited_idxs2)
print len(catalog)
print len(catalog) - len(visited_idxs2)

In [ ]:
plt.plot(rbc, sham_xi/shuffled_xi)
plt.xscale('log')

In [ ]:
catalog['gal_smass'] = shuffled_mags[:,0]
catalog['halo_x'] = shuffled_pos[:,0]
catalog['halo_y'] = shuffled_pos[:,1]
catalog['halo_z'] = shuffled_pos[:,2]
catalog['halo_vx'] = shuffled_vel[:,0]
catalog['halo_vy'] = shuffled_vel[:,1]
catalog['halo_vz'] = shuffled_vel[:,2]
catalog['halo_upid']=shuffled_upids[:]
catalog['halo_mvir_host_halo'] = shuffled_host_mvir[:]

In [ ]:
del catalog['halo_vmax@mpeak']
del catalog['halo_rvir']
del catalog['halo_mpeak']
del catalog['halo_id']
del catalog['halo_rs']
del catalog['halo_nfw_conc']
del catalog['halo_hostid']
del catalog['halo_mvir_host_halo']
del catalog['halo_x_host_halo']
del catalog['halo_y_host_halo']
del catalog['halo_z_host_halo']
del catalog['halo_vx_host_halo']
del catalog['halo_vy_host_halo']
del catalog['halo_vz_host_halo']
del catalog['halo_rvir_host_halo']
del catalog['halo_nfw_conc_host_halo']

In [ ]:
nd = 4.2e-4 #nd of final cat 
n_obj_needed = int(nd*(1000.0**3))
sort_idxs = np.argsort(catalog['gal_smass'])
catalog = catalog[sort_idxs[:n_obj_needed]]

In [ ]:
catalog.colnames

In [ ]:
catalog.write('../catalog_ab_%s_shuffled.hdf5'%ab_property,
              format = 'hdf5', path = '../catalog_ab_%s_shuffled.hdf5'%ab_property, overwrite=True)

In [ ]:
'catalog_ab_%s_shuffled.hdf5'%ab_property

In [ ]:
%%bash
ls ../*.hdf5
cd ..; pwd; cd -

In [ ]:
%%bash
ls *.ipynb

In [ ]: